In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import lifelines
Import Data
In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/fclesio/learning-space/master/Datasets/02%20-%20Classification/default_credit_card.csv')
In [3]:
df.head()
Out[3]:
In [53]:
loans_X = df.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]]
In [54]:
loans_Y = df.iloc[:,[24]]
In [55]:
loans_X.head()
Out[55]:
In [56]:
loans_Y.head()
Out[56]:
In [57]:
###
### Generate Training and Testing Set
###
from sklearn import cross_validation
"""
X_train: independent (target) variables for train data set
Y_train: dependent (outcome) variable for train data set
X_test: independent (target) variables for the test data set
Y_test: dependent (outcome) variable for the test data set
"""
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(
loans_X, loans_Y, test_size=0.2, random_state=0)
In [ ]: